### Replication of Study 3, Part B

fitmodel <- function(f){  # model fit function
  lm(f, data=fake.data)
}

#############################################################################################
### Experimental Conditions

for(S in 1:5){

n <- 1000
k <- 10
q <- 5
r2 <- .5
b.vec <- rep(c(.25, .5, .75, 1, 1.25)[S], q) # Coefficient of explanatory variable
     
s <- 1000 # Number of Simulations
result.array <- array(NA, c(6,8,s))

ptm <- proc.time()

for(g in 1:s){
  
  print(paste("Experiment: 3B", " Condition: ", S, " Simulation: ", g, sep=""))
  

#######################################################################################################################
### Simulate Data Set
c.vec <- rnorm(k, 0, .3) # add k random correlations
c.vec <- ifelse(c.vec>=1, .99, c.vec)
c.vec <- ifelse(c.vec<=-1, -.99, c.vec)

##### huerlimann
n <- k
z <- c.vec

M <- matrix(NA, n, n)
diag(M) <- 1
for(i in 1:n-1){
  for(j in (i+1):n){
    r <- rnorm(1, 0, .3) # ATTENTION HERE 
    r <- ifelse(r>=1, .99, r)
    r <- ifelse(r<=-1, -.99, r)
    M[i,j] <- r
  }
}

d <- n
for(n in 1:d){
  for(i in 1:d){
    M[i,n] <- M[n,i]
  }
}


for(i in 1:(n-1)){
  
  M[i,n] <- z[i]
  
}


### Generate empty nxn correlation matrix R

R <- matrix(NA, n, n)
diag(R) <- 1

### STEP 1 (H?rlimann 2014: 3.1)
for(i in 1:(n-1)){
  
  R[i,n] <- M[i,n] 
  
}

### STEP 2 (H?rlimann 2014: 3.2)
for(i in 1:(n-2)){
  
  R[i,n-1] <- M[i,n] * M[n-1, n] + M[i, n-1]*sqrt((1- M[i, n]^2)*(1- M[n-1, n]^2))
  
}

### STEP 3 (H?rlimann 2014: 3.3)
for(k in 2:(n-2)){
  for(i in 1:(n-k-1)){
    
    
    p.1 <- M[i,n] * M[n-k, n]   
    
    prod.f  <- function(j){
      l <- (n-j+2):n
      prod(sqrt((1- M[i, l]^2)*(1- M[n-k, l]^2)), na.rm=T)  
    }
    prod.f <- Vectorize(prod.f, "j")
    
    j <- 2:k
    p.2 <- sum(M[i, n-j+1] * M[n-k, n-j+1] * prod.f(j) )
    
    l <- (n-k+1):n
    p.3 <- prod(sqrt((1- M[i, l]^2)*(1- M[n-k, l]^2)), na.rm=T) 
    p.3 <- M[i, n-k] * p.3
    
    
    R[i, n-k] <- p.1 + p.2 + p.3
    
  }
}

##################################################################################################

for(n in 1:d){
  for(i in 1:d){
    R[i,n] <- R[n,i]
  }
}

M <- R
k <- 10
n <- 1000

X <- rmvnorm(n, mean=rep(0, k), sigma = M)

### DGP
y <- as.matrix(X[, 1:q]) %*% b.vec 
res <- rnorm(n, 0, 8) # Fix this bit to get SE of around .25 ! 
y <- y + res

fake.data <- as.data.frame(cbind(y, X))
colnames(fake.data) <- c("y", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10")
#colnames(fake.data) <- c("y", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
#                        "x11", "x12", "x13", "x14", "x15")
#                         ,"x16", "x17", "x18", "x19", "x20")

#######################################################################################################################

#### Generate Variable Combinations
X <- fake.data[,-1]

combs <- do.call(expand.grid, rep(list(c(FALSE, TRUE)), ncol(X)))[-1,]

### Assign Variable Names
vars <- apply(combs, 1, function(i) names(X)[i]) 

form <- paste("y ~ ", lapply(vars, paste, collapse = " + "), sep = "")                
form <- lapply(form, as.formula)                                                                 

### Fit Models
#ptm <- proc.time()
models <- lapply(form, fitmodel) # fit models
#proc.time() - ptm

### Post-Process Models
names(models) <- seq_along(models) # assign model ID
mods <- mod_summary(models) # summary of models
coefs <- coef_summary(models) # summary of coefficients

coefs_wide_b <- acast(coefs, model ~ variable, value.var=c("Est"))
coefs_wide_se <- acast(coefs, model ~ variable, value.var=c("SE"))
colnames(coefs_wide_se) <- paste("SE", colnames(coefs_wide_b) , sep="")
coefs <- cbind(coefs_wide_b, coefs_wide_se)
coefs <- coefs[,order(colnames(coefs), decreasing=T)]
model <- c(1:dim(mods)[1])
coefs <- cbind(model, coefs)

DATA <- merge(mods, coefs, by="model")
###############################################################################
# EBA vs. Model Averaging

K <- k
Q <- q
true.b <- c(b.vec, rep(0, K-Q))

## EBA (Leamer)
eb.min <- apply(DATA[,7:(6+K)], 2, min, na.rm=T)
eb.max <- apply(DATA[,7:(6+K)], 2, max, na.rm=T)

index.min <- apply(DATA[,7:(6+K)], 2, function(i){which(i==min(i,na.rm=T))})
index.max <- apply(DATA[,7:(6+K)], 2, function(i){which(i==max(i,na.rm=T))})

if(class(index.min)=="list"){
  index.min <- unlist(lapply(index.min, min))
}

if(class(index.max)=="list"){
  index.max <- unlist(lapply(index.max, max))
}

se.min <- DATA[,(7+K):(7+K+K-1)][cbind(index.min, c(1:K))]
se.max <- DATA[,(7+K):(7+K+K-1)][cbind(index.max, c(1:K))]

# Dreher
qu.1 <- apply(DATA[,7:(6+K)], 2, quantile, .1, na.rm=T)
qu.9 <- apply(DATA[,7:(6+K)], 2, quantile, .9, na.rm=T)

# Model Averaging
ma.ew <- apply(DATA[,7:(6+K)], 2, mean, na.rm=T) 
var.b.ew <- apply(DATA[,(7+K):(7+K+K-1)], 2, mean, na.rm=T) 
var.b.ew <- var.b.ew^2
var.ew <- apply(DATA[,7:(6+K)], 2, var, na.rm=T)
sd.ew <- sqrt(var.b.ew + var.ew)

bic <- DATA$BIC/1000 # rescale BIC 
w <- (exp(-bic/2))/(sum(exp(-bic/2))) # weight (cf. Moral-Benito 2010: 19)
ma.w.bic <- apply(DATA[,7:(6+K)], 2, function(i){sum(i * w, na.rm=T)})
var.b.w.bic <- apply(DATA[,(7+K):(7+K+K-1)], 2, function(i){sum((i)^2 * w, na.rm=T)})
var.w.bic <- apply(DATA[,7:(6+K)], 2, function(i){sum( (i-mean(i, na.rm=T))^2 * w, na.rm=T)})
sd.w.bic <- sqrt(var.b.w.bic + var.w.bic)

dev <- DATA$Deviance/1000 # rescale Deviance 
L <- exp(-dev/2) # Likelihood
w <- L/sum(L) # weight 
ma.w.ll <- apply(DATA[,7:(6+K)], 2, function(i){sum(i * w, na.rm=T)})
var.b.w.ll <- apply(DATA[,(7+K):(7+K+K-1)], 2, function(i){sum((i)^2 * w, na.rm=T)}) 
sd.w.ll <- sqrt(var.b.w.ll)

############################################################################################################################
# Robustness Criteria
in.out.eb.s <- 0<eb.min*eb.max & 1.96<abs(eb.min/se.min) & 1.96<abs(eb.max/se.max) # EBA: min and max same sign & significant
in.out.eb <- 0<eb.min*eb.max # EBA: min and max same sign
in.out.ma <- 1.96<abs(ma.ew/sd.ew) # Unweighted Model Averaging
in.out.ma.sim <- .95<apply(cbind(pnorm(0, ma.w.ll, sd.w.ll), 1-pnorm(0, ma.w.ll, sd.w.ll)), 1, max) # Sala-i-Martin: Model Averaging weighted by LL
in.out.ma.bic <- 1.96<abs(ma.w.bic/sd.w.bic) # Model Averaging weighted by BIC 
in.out.d90 <- 0<qu.1*qu.9 # Dreher: 90% quantiles same sign

beta.mat <- rbind(in.out.eb.s, in.out.eb, in.out.ma, in.out.ma.sim, in.out.ma.bic, in.out.d90, ma.ew, ma.w.ll, ma.w.bic, qu.1, eb.min)

colnames(beta.mat) <- c(9, 8, 7, 6, 5, 4, 3, 2, 10, 1)
#colnames(beta.mat) <- c(9, 8, 7, 6, 5, 4, 3, 2, 14, 13, 12, 11, 10, 1)

beta.mat <- beta.mat[,order(as.numeric(colnames(beta.mat)), decreasing=F)]

#############################################################################################################################
# Predictive Criteria

num.robust <- apply(beta.mat[1:6, 1:K], 1, sum) # Number of variables declared "robust"

  true.pos <- apply(beta.mat[1:6, 1:Q], 1, sum) # Number of True Positives
  false.pos <- apply(beta.mat[1:6, (Q+1):K], 1, sum) # Number of FP
  true.neg <- (K-Q)-false.pos
  false.neg <- Q-true.pos

tpr <- true.pos/(true.pos+false.neg) # TPR
tnr <- true.neg/(true.neg+false.pos) # TNR  

correct.sign.ma.ew <- sum(beta.mat["ma.ew",1:Q]>0)/sum(true.b[1:Q]>0)  ### Correct Sign
correct.sign.ma.w.ll <- sum(beta.mat["ma.w.ll",1:Q]>0)/sum(true.b[1:Q]>0)
correct.sign.ma.w.bic <- sum(beta.mat["ma.w.bic",1:Q]>0)/sum(true.b[1:Q]>0)

correct.sign.d90 <-  sum(beta.mat["in.out.d90", 1:Q]==1 & beta.mat["qu.1", 1:Q]>0)/sum(true.b[1:Q]>0)
correct.sign.eb <-  sum(beta.mat["in.out.eb", 1:Q]==1 & beta.mat["eb.min", 1:Q]>0)/sum(true.b[1:Q]>0)
correct.sign.eb.s <-  sum(beta.mat["in.out.eb.s", 1:Q]==1 & beta.mat["eb.min", 1:Q]>0)/sum(true.b[1:Q]>0)

correct.sign <- c(correct.sign.eb.s, correct.sign.eb, correct.sign.ma.ew, correct.sign.ma.w.ll, correct.sign.ma.w.bic, correct.sign.d90)

rmse.ew <- sqrt(mean((true.b - beta.mat["ma.ew",])^2)) ## RMSE 
rmse.w.ll <- sqrt(mean((true.b - beta.mat["ma.w.ll",])^2))
rmse.w.bic <- sqrt(mean((true.b - beta.mat["ma.w.bic",])^2))

rmse <- c(NA, NA, rmse.ew, rmse.w.ll, rmse.w.bic, NA)

corr.ew <- cor(true.b, beta.mat["ma.ew",]) ## Correlation with true b
corr.w.ll <- cor(true.b, beta.mat["ma.w.ll",])
corr.w.bic <- cor(true.b, beta.mat["ma.w.bic",])

corrs <- c(NA, NA, corr.ew, corr.w.ll, corr.w.bic, NA)

result.mat <- cbind(num.robust, true.pos, false.pos, true.neg, false.neg, correct.sign, rmse, corrs) 
result.array[,,g] <- result.mat  


}

time.needed <- proc.time()-ptm

colnames(result.array) <- c("num.robust", "true.pos", "false.pos", "true.neg", "false.neg", "correct.sign", "rmse", "corrs")
rownames(result.array) <- c("eb.s", "eb", "ma", "ma.sim", "ma.bic", "d90")


saveRDS(result.array, paste("five_norm_3_result_mat_", S, ".rds", sep=""))

}



